package com.blacktea.webmagic.demo.util;

import cn.hutool.core.io.FileTypeUtil;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.lang.Validator;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.ReUtil;
import com.alibaba.fastjson.JSON;
import com.blacktea.webmagic.demo.config.MyHttpClientDownloader;
import com.blacktea.webmagic.demo.constant.DownConstant;
import com.blacktea.webmagic.demo.constant.FileTypeConstant;
import com.blacktea.webmagic.demo.domain.Download;
import com.blacktea.webmagic.demo.domain.Field;
import com.blacktea.webmagic.demo.domain.ListPage;
import com.blacktea.webmagic.demo.processor.AbstractPageProcessor;
import com.blacktea.webmagic.demo.processor.CSDNItemArticleProcessor;
import com.blacktea.webmagic.demo.processor.CSDNListProcessor;
import com.blacktea.webmagic.demo.strategy.DownloadContext;
import com.overzealous.remark.Remark;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

/**
 * @description:
 * @author: black tea
 * @date: 2021/9/26 10:43
 */
@Slf4j
public class WebMagicUtil {

    /**
     * 设置 Downloader,默认Downloader为{@link MyHttpClientDownloader}
     * @param pageProcessor pageProcessor
     * @return Spider
     */
    public static Spider create(PageProcessor pageProcessor){
        return Spider.create(pageProcessor)
                .setDownloader(new MyHttpClientDownloader());
    }

    public static Spider create(PageProcessor pageProcessor, String pageUrl, List<Pipeline> pipelines){
        return create(pageProcessor)
                .setPipelines(pipelines)
                .addUrl(pageUrl)
                .thread(1);
    }

    public static Spider create(PageProcessor pageProcessor, String pageUrl,Pipeline... pipeline){
        List<Pipeline> pipelines = Arrays.stream(pipeline).collect(Collectors.toList());
        return create(pageProcessor,pageUrl,pipelines);
    }

    public static Spider create(PageProcessor pageProcessor, String pageUrl, int thread, Pipeline... pipeline){
        return create(pageProcessor,pageUrl,pipeline).thread(thread);
    }
    public static Site site(int timeOut, int retryTimes, int sleepTime){
        return Site.me()
                .setTimeOut(timeOut)
                .setRetryTimes(retryTimes)
                .setSleepTime(sleepTime);
    }

    public static Site site(){
        return site(300,3,100);
    }

    public static Selectable matchVal(Field field, Page page){
        Selectable value = null;
        switch (field.getType()){
            case Regex:
                value = page.getHtml().regex(field.getValue());
                break;
            case XPath:
                value = page.getHtml().xpath(field.getValue());
                break;
            case Css:
                value = page.getHtml().css(field.getValue());
                break;
            case JsonPath:
                value = page.getHtml().jsonPath(field.getValue());
                break;
            default:
                Assert.state(false,"该Field类型值->{},是不合法的!", JSON.toJSONString(field));
        }
        return value;
    }

    public static AbstractPageProcessor csdnProcess(Download download, boolean all){
        DownloadContext.context(download).ifPresent(context -> {
            context.executCheck(download);
        });
        com.blacktea.webmagic.demo.domain.Page iPage = download.getPage();
        Site site = WebMagicUtil.site();
        // 用于判断是否需要进行添加目标页面,目前限制是仅在列表页进行添加!
        AtomicBoolean isAddTargetRequests = new AtomicBoolean(true);
        Consumer<Page> pageConsumer = page -> {
            if (iPage instanceof ListPage && isAddTargetRequests.get()){
                page.addTargetRequests(page.getHtml().links().regex(((ListPage) iPage).getTargetUrl()).all());
                isAddTargetRequests.compareAndSet(true,false);
            }
            List<Field> fields = iPage.getFields();
            fields.forEach(field -> {
                Object selectableValue = getSelectableValue(field, page, all);
                Optional.ofNullable(selectableValue).ifPresent(value -> page.putField(field.getKey(), value));
            });
        };
        if (iPage instanceof ListPage){
            return new CSDNListProcessor(site, pageConsumer);
        }
        return new CSDNItemArticleProcessor(site,pageConsumer);
    }

    private static Object getSelectableValue(Field field,Page page, boolean all){
        Selectable selectable = WebMagicUtil.matchVal(field, page);
        log.debug("当前field.key={},匹配的结果为:{}",field.getKey(),JSON.toJSONString(selectable.all()));
        if (Validator.isNotNull(selectable)){
            // selectable get()/all(),看导出单个具体文件(md),还是压缩文件(zip)
            if (all){
               return selectable.all();
            }else {
               return selectable.get();
            }
        }
        return null;
    }

    public static Map<String, Map<String, String>> getDataMap(Download download, ResultItems resultItems,AtomicInteger indexInteger, AtomicInteger imgInteger, AtomicInteger mdInteger){
        Map<String, Map<String, String>> dataMap = new HashMap<>();
        List<Field> fields = download.getPage().getFields();
        if (CollectionUtils.isEmpty(fields)){
            return dataMap;
        }
        if (download.getPage() instanceof ListPage){
            indexInteger.incrementAndGet();
        }
        Map<String, String> mdMap = new HashMap<>();
        Map<String, String> imgMap = new HashMap<>();
        fields.forEach(field -> {
            FileTypeConstant.FileTypeEnum fileType = field.getFileType();
            String fieldKey = field.getKey();
            List<String> keys = resultItems.get(fieldKey);
            if (!CollectionUtils.isEmpty(keys)){
                switch (fileType){
                    case MD:
                        keys.forEach(key -> {
                            if (!StringUtils.isEmpty(key)){
                                Remark remark = new Remark();
                                String html = remark.convertFragment(key);
                                String mdKey = fieldKey+"\\";
                                if (Validator.isNull(indexInteger)){
                                    mdKey = mdKey+ "\\" + mdInteger.incrementAndGet() + fileType.getFileType();
                                }else {
                                    mdKey = mdKey+"md\\" + mdInteger.incrementAndGet() + fileType.getFileType();
                                }
                                if (Validator.isNotNull(indexInteger)){
                                    mdKey = indexInteger.get()+"\\"+mdKey;
                                }
                                mdMap.put(mdKey,html);
                            }
                        });
                        break;
                    case PNG:
                        if (Validator.isNull(indexInteger)){
                            imgMap.putAll(WebMagicUtil.getImgMap(fieldKey,keys,null,imgInteger));
                        }else {
                            imgMap.putAll(WebMagicUtil.getImgMap(fieldKey,keys,indexInteger.get(),imgInteger));
                        }
                        break;
                    default:
                        Assert.state(false,"当前方法DownloadPipeline.download()-page-的feild.filetype类型不支持!");
                }
            }
        });
        dataMap.put(DownConstant.MD_DATA_MAP,mdMap);
        dataMap.put(DownConstant.IMG_DATA_MAP,imgMap);
        return dataMap;
    }
    public static Map<String, String> getImgMap(String fieldKey, List<String> keys,Integer index, AtomicInteger imgInteger){
        Map<String, String> imgMap = new HashMap<>();
        keys.forEach(key -> {
            if (!StringUtils.isEmpty(key)){
                String imgUrl = WebMagicUtil.getImgUrlBySrc(key);
                if(!StringUtils.isEmpty(imgUrl)){
                    String name = WebMagicUtil.getImgNameBySrc(key,imgInteger);
                    if (Validator.isNull(index)){
                        imgMap.put(fieldKey+"\\"+name,imgUrl);
                    }else {
                        imgMap.put(index+"\\"+fieldKey +"\\img\\"+name,imgUrl);
                    }
                }
            }
        });
        return imgMap;
    }

    public static String getImgUrlBySrc(String imgSrc){
        return ReUtil.getGroup0("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)", imgSrc)
                .replace("src=","")
                .replaceAll("\"","");
    }

    public static String getImgNameBySrc(String imgSrc, AtomicInteger imgInteger){
        String name = ReUtil.getGroup0("alt\\s*=\\s*\"?(.*?)(\"|>|\\s+)", imgSrc);
        if (StringUtils.isEmpty(name)){
            name = ""+imgInteger.incrementAndGet();
        }else {
            name = name.replace("alt=","").replaceAll("\"","");
            if (StringUtils.isEmpty(name)){
                name = ""+imgInteger.incrementAndGet();
            }
        }
        return name;
    }

    public static void writeZosZip2(ZipOutputStream zos, Map<String, Map<String, String>> dataMap){
        if (MapUtil.isNotEmpty(dataMap)){
            Map<String, String> mdMap = dataMap.get(DownConstant.MD_DATA_MAP);
            if (MapUtil.isNotEmpty(mdMap)) {
                WebMagicUtil.writeZosMd(mdMap,zos);
            }
            Map<String, String> imgMap = dataMap.get(DownConstant.IMG_DATA_MAP);
            if (MapUtil.isNotEmpty(imgMap)) {
                Map<String, InputStream> inputStreamMap = WebMagicUtil.inputStreamMap(imgMap);
                WebMagicUtil.writeZosImg(inputStreamMap,zos);
            }
        }
    }

    public static void writeZosMd(Map<String,String> inputMdMap,ZipOutputStream zos){
        inputMdMap.forEach((k,v) -> {
            try {
                zos.putNextEntry(new ZipEntry(k));
                byte[] bytes = v.getBytes(StandardCharsets.UTF_8);
                zos.write(bytes);
                zos.closeEntry();
            } catch (IOException e) {
                log.error("将多个md文件存入zip出现异常",e);
            }
        });
    }
    public static void writeZosImg(Map<String,InputStream> inputStreamMap, ZipOutputStream zos){
        inputStreamMap.forEach((k,v) -> {
            try {
                zos.putNextEntry(new ZipEntry(k));
                byte[] bytes = IoUtil.readBytes(v);
                IoUtil.close(v);
                zos.write(bytes);
                zos.closeEntry();
            } catch (IOException e) {
                log.error("将多个img文件存入zip出现异常",e);
            }
        });
    }

    public static Map<String,InputStream> inputStreamMap(Map<String, String> imgMap){
        Map<String, InputStream> inputStreamMap = new HashMap<>();
        imgMap.forEach((k,v) -> {
            InputStream inputStream = downloadImg(v);
            Optional.ofNullable(inputStream).ifPresent(in -> {
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                IoUtil.copy(inputStream, baos);
                IoUtil.close(inputStream);
                // 打开两个新的输入流, stream1 用于获取文件类型,stream2 用于流下载
                InputStream stream1 = new ByteArrayInputStream(baos.toByteArray());
                InputStream stream2 = new ByteArrayInputStream(baos.toByteArray());
                IoUtil.close(baos);
                IoUtil.close(stream1);
                // 抛弃url中的名称部分作为下载文件的后缀，
                // 因为可能存在如下url -> {https://img-blog.csdnimg.cn/img_convert/1b2de10c265e39edb36fce24c81c5ade.png#pic_center}，
                // 这种处理起来过于麻烦。
                // 我选择hutool,哈哈！
                String type = FileTypeUtil.getType(stream1);
                inputStreamMap.put(k+"."+type,stream2);
            });
        });
        return inputStreamMap;
    }

    public static InputStream downloadImg(String imgUrl) {
        try {
            URL url = new URL(imgUrl);
            HttpURLConnection conn=(HttpURLConnection) url.openConnection();
            DownConstant.RequestHeadEnum userAgent = DownConstant.RequestHeadEnum.USER_AGENT;
            conn.setRequestProperty(userAgent.getKey(),userAgent.getValue());
            conn.connect();
            InputStream inputStream = conn.getInputStream();
            log.debug("\n当前获取图片-{}完成。",imgUrl);
            return inputStream;
        } catch (IOException e) {
            log.error("获取{}图片失败!",imgUrl,e);
        }
        return null;
    }
}
